Activating Libraries

library(readxl)
library(ggpubr)
library(ggsci)
library(dplyr)
library(rstatix)

# Function use to filter data that do not contain an element or list of elements with dplyr
`%notin%` <- Negate(`%in%`)

Data wrangling

Importing data

setwd("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/tables/metadata/")
data <- read_excel("IMM_ceftiofur_metadata.xlsx", sheet = "metadata")
microbiome <- read_excel("IMM_ceftiofur_metadata.xlsx", sheet = "metaphlan")
metagenome_stats <- read_excel("IMM_ceftiofur_metadata.xlsx", sheet = "metagenome_stats")
temperature <- read_excel("IMM_ceftiofur_metadata.xlsx", sheet = "temperature")
diet <- read_excel("IMM_ceftiofur_metadata.xlsx", sheet = "diet")

Merging tables for metadata

metadata <- merge(data, diet) %>% 
  merge(temperature) %>%
  merge(metagenome_stats) %>% 
  merge(microbiome) %>% 
  mutate(Treatment = factor(Treatment, levels = c("Control", "Antibiotic")),
          Time_tx = factor(Time_tx, levels = c("Day -1", "Week 1", "Week 5", "Week 9")))

Normalizing abundance based on Genome equivalents

metadata <- metadata %>% mutate(normalized_abundance = Reads/genome_equivalents*100)

Proportion of reads mapping taxa

Calculating proportion of reads per taxa based on the number of Non-host reads

metadata <- metadata %>% mutate(proportion_reads = Reads/reads_nonhost*100)

metadata %>% 
  filter(Taxa %in% c("k__Bacteria","k__Archaea","k__Eukaryota")) %>% 
  group_by(sample_ID, Time_tx,Treatment,reads_nonhost,Taxa) %>% 
  dplyr::summarise(sum = sum(Reads)) %>% 
  mutate(proportion = sum/reads_nonhost*100) %>% 
  group_by(Taxa) %>% 
  dplyr::summarise(mean=mean(proportion),sd = sd(proportion))

Importing counts kraken-bracken

kraken <- read_excel("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/tables/metadata/IMM_ceftiofur_metadata.xlsx", sheet = "bracken")
kraken$Taxa <- trimws(kraken$Taxa_bracken)

# Merging kraken and metadata
metadata_kraken <- merge(data, diet) %>% 
  merge(temperature) %>%
  merge(metagenome_stats) %>% 
  merge(kraken) %>% 
  mutate(Treatment = factor(Treatment, levels = c("Control", "Antibiotic")),
          Time_tx = factor(Time_tx, levels = c("Day -1", "Week 1", "Week 5", "Week 9"))) %>% mutate(normalized_abundance = reads_bracken/genome_equivalents*100)

Calculating domain proportion from bracken

metadata_kraken %>% filter(level == "D", Taxa_bracken %in% c("Viruses","Bacteria","Archaea","Eukaryota")) %>% 
  group_by(sample_ID, Time_tx,Treatment,reads_nonhost,Taxa_bracken) %>% 
  dplyr::summarise(sum = sum(reads_bracken)) %>% 
  mutate(proportion = sum/reads_nonhost*100) %>% 
  group_by(Taxa_bracken) %>% 
  dplyr::summarise(mean=mean(proportion),sd = sd(proportion))

Linear-mixed effect models

Bacteria

library(nlme)
bac <- metadata %>% 
  filter(Taxa %in% c("k__Bacteria")) 
lm.test <- lme(Reads ~ days_tx*temperature_Celsius+diet*days_tx+Treatment, random=~1|study_ID, data = bac)
summary(lm.test)
## Linear mixed-effects model fit by REML
##   Data: bac 
##        AIC      BIC    logLik
##   4114.186 4147.303 -2046.093
## 
## Random effects:
##  Formula: ~1 | study_ID
##         (Intercept) Residual
## StdDev:     57401.9 164376.6
## 
## Fixed effects:  Reads ~ days_tx * temperature_Celsius + diet * days_tx + Treatment 
##                                Value Std.Error  DF    t-value p-value
## (Intercept)                   422279    133946 112  3.1525977  0.0021
## days_tx                        -1355      2834 112 -0.4780378  0.6336
## temperature_Celsius             4463      5979 112  0.7464376  0.4570
## dietFresh                   -8796493  10979179 112 -0.8011977  0.4247
## dietMaintenance               126343    105070 112  1.2024668  0.2317
## TreatmentAntibiotic            -6471     32029  38 -0.2020395  0.8410
## days_tx:temperature_Celsius      -46       116 112 -0.3991463  0.6905
## days_tx:dietFresh             139756    174204 112  0.8022544  0.4241
## days_tx:dietMaintenance       -37741    104264 112 -0.3619785  0.7181
##  Correlation: 
##                             (Intr) dys_tx tmpr_C dtFrsh dtMntn TrtmnA dy_:_C
## days_tx                     -0.869                                          
## temperature_Celsius         -0.959  0.808                                   
## dietFresh                    0.017 -0.008 -0.010                            
## dietMaintenance              0.038  0.011 -0.125  0.004                     
## TreatmentAntibiotic         -0.205  0.086  0.091 -0.067  0.024              
## days_tx:temperature_Celsius  0.840 -0.886 -0.876  0.010  0.121 -0.098       
## days_tx:dietFresh           -0.015  0.004  0.009 -1.000 -0.005  0.067 -0.008
## days_tx:dietMaintenance      0.141 -0.132 -0.149  0.003  0.913  0.026  0.138
##                             dys_:F
## days_tx                           
## temperature_Celsius               
## dietFresh                         
## dietMaintenance                   
## TreatmentAntibiotic               
## days_tx:temperature_Celsius       
## days_tx:dietFresh                 
## days_tx:dietMaintenance     -0.003
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.0010430 -0.5119372 -0.1218996  0.3679717  4.0702307 
## 
## Number of Observations: 159
## Number of Groups: 40
anova(lm.test)

Eukaryota

library(nlme)
euk <- metadata %>% 
  dplyr::filter(Taxa %in% c("k__Eukaryota"))
lm.test <- lme(Reads ~ days_tx, random=~1|study_ID, data = euk)
summary(lm.test)
## Linear mixed-effects model fit by REML
##   Data: euk 
##        AIC      BIC    logLik
##   19.79255 11.79255 -5.896275
## 
## Random effects:
##  Formula: ~1 | study_ID
##         (Intercept) Residual
## StdDev:    7.282932 2.731099
## 
## Fixed effects:  Reads ~ days_tx 
##                Value Std.Error DF   t-value p-value
## (Intercept) 8725.313  4.909732  1 1777.1464  0.0004
## days_tx        0.812  1.190785  1    0.6823  0.6188
##  Correlation: 
##         (Intr)
## days_tx -0.404
## 
## Standardized Within-Group Residuals:
##         MA014         MA015         MA009 
##  2.482818e-01 -6.660283e-13 -2.482818e-01 
## attr(,"label")
## [1] "Standardized residuals"
## 
## Number of Observations: 3
## Number of Groups: 3
anova(lm.test)

Archaea

library(nlme)
arc <- metadata %>% 
  filter(Taxa %in% c("k__Archaea")) 
lm.test <- lme(Reads ~ days_tx*temperature_Celsius+diet*days_tx+Treatment, random=~1|study_ID, data = arc)
summary(lm.test)
## Linear mixed-effects model fit by REML
##   Data: arc 
##        AIC      BIC    logLik
##   3471.262 3504.379 -1724.631
## 
## Random effects:
##  Formula: ~1 | study_ID
##         (Intercept) Residual
## StdDev:    8585.423 18786.82
## 
## Fixed effects:  Reads ~ days_tx * temperature_Celsius + diet * days_tx + Treatment 
##                                  Value Std.Error  DF   t-value p-value
## (Intercept)                     7559.5   15661.6 112  0.482680  0.6303
## days_tx                           94.4     327.1 112  0.288434  0.7735
## temperature_Celsius              833.7     698.1 112  1.194321  0.2349
## dietFresh                   -1977812.0 1276741.2 112 -1.549110  0.1242
## dietMaintenance                50650.3   12193.3 112  4.153946  0.0001
## TreatmentAntibiotic              110.7    4058.9  38  0.027270  0.9784
## days_tx:temperature_Celsius      -13.4      13.4 112 -1.000566  0.3192
## days_tx:dietFresh              31621.3   20257.6 112  1.560958  0.1214
## days_tx:dietMaintenance        16435.9   12134.9 112  1.354430  0.1783
##  Correlation: 
##                             (Intr) dys_tx tmpr_C dtFrsh dtMntn TrtmnA dy_:_C
## days_tx                     -0.864                                          
## temperature_Celsius         -0.958  0.808                                   
## dietFresh                    0.019 -0.010 -0.013                            
## dietMaintenance              0.043  0.002 -0.128  0.006                     
## TreatmentAntibiotic         -0.207  0.079  0.082 -0.062  0.022              
## days_tx:temperature_Celsius  0.834 -0.888 -0.872  0.013  0.127 -0.089       
## days_tx:dietFresh           -0.018  0.007  0.012 -1.000 -0.007  0.062 -0.011
## days_tx:dietMaintenance      0.144 -0.137 -0.153  0.006  0.915  0.023  0.144
##                             dys_:F
## days_tx                           
## temperature_Celsius               
## dietFresh                         
## dietMaintenance                   
## TreatmentAntibiotic               
## days_tx:temperature_Celsius       
## days_tx:dietFresh                 
## days_tx:dietMaintenance     -0.005
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -2.1717011 -0.5197665 -0.1350476  0.4407227  3.3563549 
## 
## Number of Observations: 159
## Number of Groups: 40
anova(lm.test)

Viruses

library(nlme)
vir <- metadata_kraken%>% 
  filter(Taxa_bracken %in% c("Viruses")) 
lm.test <- lme(reads_bracken ~ days_tx*temperature_Celsius+days_tx+Treatment, random=~1|study_ID, data = vir)
summary(lm.test)
## Linear mixed-effects model fit by REML
##   Data: vir 
##        AIC      BIC    logLik
##   523.1159 534.3923 -254.5579
## 
## Random effects:
##  Formula: ~1 | study_ID
##         (Intercept) Residual
## StdDev:  0.01861029 144.0987
## 
## Fixed effects:  reads_bracken ~ days_tx * temperature_Celsius + days_tx + Treatment 
##                                  Value Std.Error DF    t-value p-value
## (Intercept)                 -168.55820 201.00318 29 -0.8385847  0.4086
## days_tx                        9.27499   3.69596  8  2.5094961  0.0364
## temperature_Celsius           12.77461   9.31988  8  1.3706840  0.2077
## TreatmentAntibiotic           38.81898  46.40890 29  0.8364556  0.4097
## days_tx:temperature_Celsius   -0.50682   0.18145  8 -2.7931092  0.0234
##  Correlation: 
##                             (Intr) dys_tx tmpr_C TrtmnA
## days_tx                     -0.843                     
## temperature_Celsius         -0.979  0.842              
## TreatmentAntibiotic         -0.266  0.120  0.118       
## days_tx:temperature_Celsius  0.801 -0.971 -0.825 -0.074
## 
## Standardized Within-Group Residuals:
##        Min         Q1        Med         Q3        Max 
## -1.6753831 -0.5989529 -0.2208254  0.2345638  3.0345107 
## 
## Number of Observations: 42
## Number of Groups: 31
anova(lm.test)

Microbiome abundance

Microbiome Abundance table

abundance_feature <- metadata %>% 
  select(normalized_abundance, Treatment, Time_tx, sample_ID,Taxa) %>% 
  group_by(Treatment, Time_tx, sample_ID) %>% 
  dplyr::summarise(Abundance = sum(normalized_abundance)) %>% 
  as.data.frame() %>% mutate(Type = "Microbiome")

abundance_feature$Treatment <- factor(abundance_feature$Treatment, levels= c("Control","Antibiotic"))

Microbiome Abundance plot

#Calculating p-values between treatments by time point
stat.test <- abundance_feature %>% 
  filter(sample_ID %notin% c("MA028.7")) %>% 
  group_by(Time_tx) %>%
  wilcox_test(Abundance ~ Treatment, alternative = "greater", paired = T) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance("p.adj") %>%
  add_xy_position(x = "Time_tx", dodge = 0.8)
stat.test
#Boxplot of Observed diversity between treatment groups over time
Abundance_boxplot = abundance_feature %>%  
  ggboxplot(x = "Time_tx", y = "Abundance", 
            color = "Treatment", palette = "jama", fill = "Treatment", alpha = 0.5,
            add = c("jitter"), notch = F, outlier.shape = NA, facet.by = "Type",
            xlab = F, ylab = "Abundance score") +
  ylim(2e6,7.3e6) +
  stat_pvalue_manual(stat.test,  label = "p", tip.length = 0)

Abundance_boxplot

Phylum abundance plot

library(data.table)
library(tidyr)
class <- metadata %>% 
  select(normalized_abundance, Treatment, Time_tx, sample_ID, Taxa) %>% 
  filter(Taxa %like% "c__")

phylum <- metadata %>% filter(Taxa %notin% class$Taxa) %>% 
  filter(Taxa %like% "p__") %>% 
  group_by(Treatment, Time_tx, Taxa) %>% 
  dplyr::summarise(Abundance = mean(normalized_abundance)) %>% 
  separate(Taxa, c("Kingdom", "Phylum"), "p__") 

top_taxa <- phylum %>% group_by(Phylum) %>% summarise(Ab = sum(Abundance)) %>% top_n(6) %>% 
  union(data.frame (Phylum  = "Others", Ab = 0)) %>% arrange((Ab))
  
phylum$Top_Taxa <- ifelse(phylum$Phylum %in% top_taxa$Phylum, phylum$Phylum, "Others")

phylum$Top_Taxa <- factor(phylum$Top_Taxa, levels = c("Actinobacteria","Firmicutes","Bacteroidetes","Euryarchaeota","Proteobacteria","Kiritimatiellaeota","Others"))

phylum_plot <- phylum %>% 
  ggbarplot(x = "Treatment", y = "Abundance",
            color = "Top_Taxa", fill = "Top_Taxa", palette = get_palette("npg", 7),
            xlab = F, ylab = "Mean normalized abundance",
            legend = "right",
            #position = position_fill(),
            facet.by = "Time_tx", nrow = 1
            ) +
  labs(color = "Taxa", fill = "Taxa")
phylum_plot

Microbiome diversity

Phyloseq table formatting

Feature table used for alpha diversity: Matrix with sequence_id as columns and features as rows

library(tibble)
#The number of reads is used for alpha diversity since it is measures a "within-sample" diversity
feature_alpha <- metadata %>% select(sequence_id,Taxa,Reads) %>% 
  spread(sequence_id, Reads) %>% 
  remove_rownames %>% column_to_rownames(var="Taxa") %>% 
  as.matrix(rownames=TRUE)
feature_alpha[is.na(feature_alpha)] <- 0

Taxonomy table

taxonomy <- microbiome %>% select(Taxa) %>% distinct() %>%
  mutate(Feature = Taxa) %>% 
  remove_rownames %>% column_to_rownames(var="Taxa") %>% 
  as.matrix()

Metadata to matrix

metadata_matrix <- merge(data,metagenome_stats) %>% 
  remove_rownames %>% column_to_rownames(var="sequence_id")

Phyloseq object

library("phyloseq")
mode(feature_alpha) <- "integer"
feature_alpha[is.na(feature_alpha)] <- 0

# Making phyloseq objects
OTU = otu_table(feature_alpha,taxa_are_rows=TRUE)
TAX = tax_table(taxonomy)
META = sample_data(metadata_matrix)

# finaly phyloseq object with feature table, taxonomy and metadata
physeq=phyloseq(OTU,TAX,META)

Alpha diversity

Calculation of Shannon, Observed, and Chao1

alpha_diversity <- estimate_richness(physeq, measures = c("Shannon", "Observed", "Chao1"))
df_alpha <- data.frame(alpha_diversity, sample_data(physeq))
alpha_table <- reshape2::melt(df_alpha, measure.var=c("Shannon","Observed","Chao1"),id.vars=c("Time_tx","Treatment","sample_ID","study_ID","Block","Cohort")) %>% 
  rename(Index = variable) %>% mutate(Type = "Microbiome")
alpha_table$value = as.numeric(alpha_table$value)
alpha_table$Treatment <- factor(alpha_table$Treatment, levels= c("Control","Antibiotic"))

Shannon diversity plot

#Calculating p-values between treatments by time point
stat.test <- alpha_table %>% 
  filter(Index == "Shannon", sample_ID %notin% c("MA028.7")) %>% 
  group_by(Time_tx) %>%
  wilcox_test(value ~ Treatment, alternative = "greater", paired = T) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance("p.adj") %>%
  add_xy_position(x = "Time_tx", dodge = 0.8)
stat.test
#Boxplot of Shannon diversity between treatment groups over time
shannon_boxplot = alpha_table %>% 
  filter(Index == "Shannon") %>% 
   ggboxplot(x = "Time_tx", y = "value", 
            color = "Treatment", palette = "jama", fill = "Treatment", alpha = 0.5,
            add = c("jitter"), notch = F, outlier.shape = NA, facet.by = "Type",
            xlab = F, ylab = "Shannon Index", legend = "none") +
  ylim(1.8,4.4)+
  stat_pvalue_manual(stat.test,  label = "p", tip.length = 0)

shannon_boxplot

Observed diversity

#Calculating p-values between treatments by time point
stat.test <- alpha_table %>% 
  filter(Index == "Observed", sample_ID %notin% c("MA028.7")) %>% 
  group_by(Time_tx) %>%
  wilcox_test(value ~ Treatment, alternative = "greater", paired = T) %>%
  adjust_pvalue(method = "bonferroni") %>%
  add_significance("p.adj") %>%
  add_xy_position(x = "Time_tx", dodge = 0.8)
stat.test
Observed_boxplot = alpha_table %>% 
  filter(Index == "Observed") %>% 
   ggboxplot(x = "Time_tx", y = "value", 
            color = "Treatment", palette = "jama", fill = "Treatment", alpha = 0.5,
            add = c("jitter"), notch = F, outlier.shape = NA, facet.by = "Type",
            xlab = F, ylab = "Observed Index") +
  ylim(500,2100)+
  stat_pvalue_manual(stat.test,  label = "p", tip.length = 0)

Observed_boxplot

Beta diversity

Feature table used for beta diversity: Matrix with sequence_id as columns and features as rows

# Normalized abundance was used for beta diversity since it is a measure of diversity between samples (microbiome ecosystems)
feature_table <- metadata %>% select(sequence_id,Taxa,normalized_abundance) %>% 
  spread(sequence_id, normalized_abundance) %>% 
  remove_rownames %>% column_to_rownames(var="Taxa") %>% 
  as.matrix(rownames=TRUE)
feature_table[is.na(feature_table)] <- 0

Phyloseq object for beta diversity

# changing OTU table using the feature table with normalized abundances for beta diversity and converting it to a phyloseq objects
OTU <- otu_table(feature_table,taxa_are_rows=TRUE)

# finaly phyloseq object with feature table, taxonomy and metadata
physeq=phyloseq(OTU,TAX,META)

Permanova by Time

library(microbial)
beta <- microbial::betatest(physeq, group="Time_tx", distance = "bray")
beta

Bray-Curtis plot

bray_microbiome <- plotbeta(
  physeq,
  group="Time_tx",
  shape = "Treatment",
  distance = "bray",
  method = "PCoA",
  color = F,
  size = 3,
  ellipse = F) + 
  labs(color = "Time", shape = "Treatment", fill="Time") + 
  annotate("text", x = -0.1, y = 0.25, 
           label = expression(paste("PERMANOVA, ",F ,"= 23.68, ",paste(italic('p')),"=0.001")), 
           colour = "black", size = 4) + 
  ggtitle("Microbiome") +
  stat_ellipse(geom = "polygon",
               aes(fill = Time_tx, group=Time_tx), 
               alpha = 0.1)+
  scale_fill_aaas() +
  scale_color_aaas()+
  theme_bw()
bray_microbiome

Arranging microbiome plots

microbiome_plots <- ggarrange(Abundance_boxplot,phylum_plot,shannon_boxplot,bray_microbiome, labels = c("A","C","B","D"), widths = c(1,2,1,2))
microbiome_plots

Saving plot

setwd("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/figures")
ggsave(plot=microbiome_plots, "Fig4_microbiome.png", width = 13, height = 7)

Differential abundance analyses

Aggregate phyloseq objects by time points

physeq_day1 <- subset_samples(physeq, Time_tx%in%"Day -1")
physeq_week1 <- subset_samples(physeq, Time_tx%in%c("Week 1"))
physeq_week5 <- subset_samples(physeq, Time_tx%in%c("Week 5"))
physeq_week9 <- subset_samples(physeq, Time_tx%in%c("Week 9"))

Day 1 (other time points were done in the same way)

physeq <- physeq_day1

LEfSE

lefse <- microbial::ldamarker(physeq, group="Treatment", pvalue = 0.05, normalize = T,method = "log2")
lefse_sigtab <- lefse[which(lefse$p.value<0.05), ]
lefse_sigtab <- lefse_sigtab %>% select(rank, direction,tax, LDAscore, p.value, p.adj) %>% arrange(tax)

ANCOMBC

library(nloptr)
library(ANCOMBC)
#ANCOMBC analysis comparison between treatment groups
out = ancombc(data = physeq, formula = "Treatment", 
              p_adj_method = "holm", lib_cut = 0,
              group = "Treatment", struc_zero = F, neg_lb = FALSE,
              tol = 1e-05, max_iter = 100, conserve = TRUE,
              alpha = 0.05, global = TRUE)

#ANCOMBC results as a list
res = out$res

#ANCOMBC results as a table
ancom_results = res %>% as.data.frame()

# Filtering only significant results
ancom_signif <- ancom_results %>% dplyr::filter(p_val.TreatmentControl <= 0.05)

Saving Lefse and ANCOM-BC results as an Excel file

#setwd("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/tables/differential/microbiome_metaphlan")

#library(xlsx)
#write.xlsx(as.data.frame(lefse_sigtab), file="microbiome_day1_treatment_diff.xlsx", sheetName="lefse", row.names=FALSE)
#write.xlsx(ancom_signif, file="microbiome_day1_treatment_diff.xlsx", sheetName="ancombc", append=TRUE, row.names=FALSE)

MaAsLin2

library(Maaslin2)
setwd("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/tables/differential/microbiome_metaphlan")
# Formating abundance table for MaAsLin2 
table <- merge(physeq@tax_table,data.frame(otu_table(physeq)), by = 0) %>% remove_rownames %>% column_to_rownames(var="Row.names")

fit_data = Maaslin2(
  input_data = merge(physeq@tax_table,data.frame(otu_table(physeq)), by = 0) %>% 
    remove_rownames %>% column_to_rownames(var="Row.names")  %>% select(2:41),
  input_metadata = metadata_matrix,
  output = 'maaslin2_microbiome_day1_notemp',
  fixed_effects = c('Treatment'),
#  random_effects = c("temperature_Celsius"),
  min_prevalence = 0.01,
  min_abundance =  0.0,
  standardize = T
)

Plots of differential abundance analyses

Importing results manually curated and compared between pipelines

library(readxl)

setwd("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/tables/microbiome/differential")
diff_taxa <- read_excel("summary_differential_taxa.xlsx", sheet = "taxa")

Calculating mean abundance per taxa and time point

mean_taxa <- metadata %>% select(Taxa,Time_tx,normalized_abundance) %>% group_by(Taxa,Time_tx) %>% summarise(mean_taxa = mean(normalized_abundance))

Generating a table with differential abundance and mean taxa

differential <- merge(metadata, diff_taxa, by = "Taxa") %>% 
  merge(mean_taxa, by = c("Taxa","Time_tx"))
differential$Treatment <- factor(differential$Treatment, levels= c("Control","Antibiotic"))

Differential plot week 1

mean_w1 <- differential %>%   
  filter(`Week 1` >= 1) %>% 
  select(Taxa,normalized_abundance) %>% 
  group_by(Taxa) %>% 
  dplyr::summarise(mean_taxa = mean(normalized_abundance)) 
mean_w1$mean_taxa <- as.numeric(as.character(mean_w1$mean_taxa))

diff_week1 <- differential %>% 
  filter(`Week 1` >= 1,Time_tx=="Week 1") %>%
  merge(mean_w1, by = "Taxa") %>% 
  select(id,normalized_abundance,Treatment,mean_taxa.x) %>% 
  mutate(fc = normalized_abundance/mean_taxa.x) %>% 
  select(id,fc,Treatment) %>% mutate(time = "Week 1")
diff_week1$fc <- as.numeric(as.character(diff_week1$fc))
order_taxa <- diff_week1 %>% filter(Treatment =="Antibiotic") %>%
  select(id,fc) %>% 
  group_by(id) %>% 
  dplyr::summarise(mean_fc = mean(fc)) %>% 
  arrange(mean_fc)

week1_plot <- diff_week1 %>% 
  ggbarplot(x="id", y = "fc", add="mean_se",color = "Treatment", fill = "Treatment",palette = "jama",orientation = "horiz", alpha = 0.5,legend = "right", ylab = "Mean fold change", xlab = "",facet.by = "time", order = order_taxa$id) +
  geom_hline(yintercept = 1, linetype="dotted", 
                color = "black", size=1)
#+ ylim(0,2)
week1_plot

Differential plot week 5

mean_w5 <- differential %>%   
  filter(`Week 5` >= 1) %>% 
  select(Taxa,normalized_abundance) %>% 
  group_by(Taxa) %>% 
  dplyr::summarise(mean_taxa = mean(normalized_abundance)) 
mean_w5$mean_taxa <- as.numeric(as.character(mean_w5$mean_taxa))

diff_week5 <- differential %>% 
  filter(`Week 5` >= 1,Time_tx=="Week 5") %>%
  merge(mean_w5, by = "Taxa") %>% 
  select(id,normalized_abundance,Treatment,mean_taxa.x) %>% 
  mutate(fc = normalized_abundance/mean_taxa.x) %>% 
  select(id,fc,Treatment) %>% mutate(time = "Week 5")
diff_week5$fc <- as.numeric(as.character(diff_week5$fc))
order_taxa <- diff_week5 %>% filter(Treatment =="Antibiotic") %>%
  select(id,fc) %>% 
  group_by(id) %>% 
  dplyr::summarise(mean_fc = mean(fc)) %>% 
  arrange(mean_fc)

week5_plot <- diff_week5 %>% 
  ggbarplot(x="id", y = "fc", add="mean_se",color = "Treatment", fill = "Treatment",palette = "jama",orientation = "horiz", alpha = 0.5, legend = "right", ylab = "Mean fold change", xlab = "", facet.by = "time", order = order_taxa$id)+
  geom_hline(yintercept = 1, linetype="dotted", 
                color = "black", size=1) #+ ylim(0,2)
week5_plot

Differential plot week 9

mean_w9 <- differential %>%   
  filter(`Week 9` >= 1) %>% 
  select(Taxa,normalized_abundance) %>% 
  group_by(Taxa) %>% 
  dplyr::summarise(mean_taxa = mean(normalized_abundance)) 
mean_w9$mean_taxa <- as.numeric(as.character(mean_w9$mean_taxa))

diff_week9 <- differential %>% 
  filter(`Week 9` >= 1,Time_tx=="Week 9") %>%
  merge(mean_w9, by = "Taxa") %>% 
  select(id,normalized_abundance,Treatment,mean_taxa.x) %>% 
  mutate(fc = normalized_abundance/mean_taxa.x) %>% 
  select(id,fc,Treatment) %>% mutate(time = "Week 9")
diff_week9$fc <- as.numeric(as.character(diff_week9$fc))
order_taxa <- diff_week9 %>% filter(Treatment =="Antibiotic") %>%
  select(id,fc) %>% 
  group_by(id) %>% 
  dplyr::summarise(mean_fc = mean(fc)) %>% 
  rbind(data_frame(id="p:Firmicutes|c:CFGB8350",mean_fc=0)) %>% 
    arrange(mean_fc)

week9_plot <- diff_week9 %>% 
  ggbarplot(x="id", y = "fc", add="mean_se",color = "Treatment", fill = "Treatment",palette = "jama",orientation = "horiz", alpha = 0.5,legend = "right", ylab = "Mean fold change", xlab = "", facet.by = "time",order = order_taxa$id) +
  geom_hline(yintercept = 1, linetype="dotted", 
                color = "black", size=1)#+ ylim(0,2)
week9_plot

Saving differential abundance plots

setwd("/Users/karlavasco/Library/CloudStorage/OneDrive-MichiganStateUniversity/Manning_lab/Mastitis_project/metagenome_KV/figures")
diff_plots <- ggarrange(week1_plot,week5_plot,week9_plot,common.legend = T,ncol=1,nrow = 3, align = ("hv"))
ggsave(plot=diff_plots, "diff_taxa_microbiome4.png", width = 10, height = 15)

diff_plots